#Alexander F. Gazmararian
#afg2@princeton.edu

#Load packages
library(janitor)
library(tidyverse)
library(here)
library(tidylog)
library(modelsummary)
library(sandwich)
library(lmtest)
library(kableExtra)

#Load custom functions
source(here("code", "fun", "fix_txt.R"))

# To access the data set:
# ICPSR 37641
# The Government Finance Database, United States, 1967-2015

# Load Data
tax_merge <- readRDS(here("data", "inter", "taxrevenue", "taxrevenue.rds"))

# Load gas visibility data
eia <- readRDS(here("data", "inter", "eiagenerator", "eia_data_final.rds"))
eia <- subset(eia, select = c(year, fips, NewGas, NewGasDistance))

# Load matched data frame
g <- readRDS("data/output/matched_df.rds")
g <- subset(g, select = c(fips, treat, post_shock, state))
g <- unique(g)

# Merge
tax.gas <- left_join(tax_merge, eia, by = c("year", "fips"))
g.out <- left_join(tax.gas, g, by = "fips")

# Subset to treated group
g.out <- subset(g.out, !is.na(treat) & year>=1992 & year <= 2012)

# Aggregate to election year
g.out <- g.out %>%
  mutate(
    eyear = case_when(
      year > 1988 & year <= 1992 ~ 1992,
      year > 1992 & year <= 1996 ~ 1996,
      year > 1996 & year <= 2000 ~ 2000,
      year > 2000 & year <= 2004 ~ 2004,
      year > 2004 & year <= 2008 ~ 2008,
      year > 2008 & year <= 2012 ~ 2012,
      T ~ NA_real_
    )
  ) %>%
  filter(!is.na(eyear)) %>%
  group_by(fips, state, eyear, treat, post_shock) %>%
  summarise(
    total_revenue = mean(total_revenue, na.rm = TRUE),
    total_taxes = mean(total_taxes, na.rm = TRUE),
    property_tax = mean(property_tax, na.rm = TRUE),
    NewGas = mean(NewGas, na.rm = TRUE),
    NewGasDistance = mean(NewGasDistance, na.rm = TRUE)
  ) %>%
  rename(year = eyear)

# Standardize explanatory variable by state and county
g.out <- g.out %>%
  group_by(fips) %>%
  mutate(NewGasDistance_c = scale(NewGasDistance)[,1]) %>%
  group_by(state) %>%
  mutate(NewGasDistance_s = scale(NewGasDistance)[,1])

# Estimate models
m <- list()
m[[1]] <- lm(log(total_revenue) ~ NewGasDistance_c + factor(year) + factor(fips), g.out)
m[[2]] <- lm(log(total_revenue) ~ NewGasDistance_c + treat * post_shock + factor(year) + factor(fips), g.out)
m[[3]] <- lm(log(total_revenue) ~ NewGasDistance_c * treat * post_shock + factor(year) + factor(fips), g.out)
m[[4]] <- lm(log(total_taxes) ~ NewGasDistance_c + factor(year) + factor(fips), g.out)
m[[5]] <- lm(log(total_taxes) ~ NewGasDistance_c + treat * post_shock + factor(year) + factor(fips), g.out)
m[[6]] <- lm(log(total_taxes) ~ NewGasDistance_c * treat * post_shock + factor(year) + factor(fips), g.out)


gof_map[gof_map$raw=="nobs",]$clean <- "$N$"
gof_map[gof_map$raw=="adj.r.squared",]$clean <- "Adjusted $R^2$"

#Table K3
tab <- here("output", "tables", "si_tab_K3_tab_revenuecor.txt")
modelsummary(
  m,
  vcov = ~ fips,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "NewGasDistance_c" = "Gas Plant Distance",
    "treat"="Coal County",
    "post_shock"="Post-Shale Shock",
    "NewGasDistance_c:treat"="Gas Plant Distance $\\times$ Coal County",
    "NewGasDistance_c:post_shock"="Gas Plant Distance $\\times$ Post-Shale Shock",
    "treat:post_shock"="Coal County $\\times$ Post-Shale Shock",
    "NewGasDistance_c:treat:post_shock"="Gas Plant Distance $\\times$ Coal County $\\times$ Post-Shale Shock"
  ),
  fmt = 2,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  add_rows = data.frame(
    c("Year Fixed Effects", "County Fixed Effects"),
    c("Yes", "Yes"),
    c("Yes", "Yes"),
    c("Yes", "Yes"),
    c("Yes", "Yes"),
    c("Yes", "Yes"),
    c("Yes", "Yes")
  ),
  escape = FALSE,
  output = "latex",
  gof_map = gof_map,
  gof_omit = c("R2|IC|Log|RMS|St")
) %>%
  add_header_above(c(" " = 1, "Total Revenue (log)" = 3, "Total Taxes (log)" = 3)) %>%
  cat(., file = tab)
fix_txt(tab)
